from lxml import etree

print('-------------------- xml 文件用 etree.parse() --------------------')
tree = etree.parse("xpathDemo2.html")

# [] 表索引，xpath 索引从 1 开始
print('-------------------- [] 表索引，xpath 索引从 1 开始 --------------------')
result = tree.xpath('/html/body/ul/li[1]/a/text()')
print(result)

# [@xxx=xxx] 实现属性筛选
print('-------------------- [@xxx=xxx] 实现属性筛选 --------------------')
result = tree.xpath('/html/body/ol/li/a[@href="dapao"]/text()')
print(result)

# 获取列表
ol_li_list = tree.xpath('/html/body/ol/li')
# 相对查找，./ 表示从当前标签开始查找
print('-------------------- 相对查找 ./ 表示从当前标签开始查找 --------------------')
print('-------------------- text() 获取文本 --------------------')
print('-------------------- @attrname 获取属性值 --------------------')
for li in ol_li_list:
    result = li.xpath("./a/text()")
    print(result)
    result = li.xpath("./a/@href")
    print(result)
